Run UMAP on CD4+ events which express a COMPASS subset. Repeat for CD8 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”
This time around, don’t sample the events.
Don’t stratify by groups, but rather color the sub-localization of the different markers.
Color by Cohort and Antigen (S1, S2, NCAP, VEMP, including DMSO)
Also color by
- Degree of functionality
- Cytokine - CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?
library(openCyto)
library(CytoML) # 1.12.0
library(flowCore) # required for description()
library(flowWorkspace) # required for gh_pop_get_data()
library(here)
library(tidyverse)
library(uwot)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
library(ggpubr)
library(tidyselect)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200815
save_output <- FALSE
rerun_dimred <- FALSE
gs <- load_gs(here::here("out/GatingSets/20200815_HAARVI_ICS_GatingSet_AllBatches_with_COMPASS_Subsets"))
## loading R object...
## loading tree object...
## Done
gs2 <- subset(gs, !(`SAMPLE ID` %in% c("37C", "BWT23", "116C", "BWT22")) &
!(`SAMPLE ID` == "551432" & STIM == "Spike 2"))
dput(gh_get_pop_paths(gs2))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S",
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/107a",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/154", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/Naive",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEMRA",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/Naive",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEMRA",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/HLADR+CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/HLADR+CD38+"
## )
cd4_gates_for_dimred <- c(
"/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
"/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
"/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")
cd4_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets"
pop_counts <- pData(gs2) %>%
left_join(gs_pop_get_count_fast(gs2, subpopulations = cd4_compass_subsets_parentGate),
by = c("rowname" = "name")) %>%
dplyr::rename(CD4_COMPASS_Subsets = Count) %>%
dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_COMPASS_Subsets) %>%
dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")
Keep in mind that there is lopsided patient and group representation simply due to not sampling:
cd4_compass_subsets_sampleSizes_4plot <- pop_counts %>%
mutate(Cohort = factor(Cohort,
levels = c("Non-hospitalized", "Hospitalized"),
labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_compass_subsets_sampleSizes_4plot,
aes(factor(Cohort), CD4_COMPASS_Subsets)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.15, height = 0) +
theme_bw(base_size=20) +
labs(title="ICS CD4 UMAP patient representation",
y="CD4+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
facet_grid(Batch ~ STIM) +
theme(axis.title.x = element_blank())
# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName) {
# print(sprintf("Sampling data from %s", currentSampleName))
sampleGatingHierarchy(gs2[[currentSampleName]], cd4_compass_subsets_parentGate, n = NULL, otherGates = cd4_gates_for_dimred)
}
cd4_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd4)
dim(cd4_compass_subsets_data)
## [1] 119921 55
knitr::kable(head(cd4_compass_subsets_data))
| name | EXPERIMENT NAME | $DATE | SAMPLE ID | PATIENT ID | STIM | WELL ID | PLATE NAME | filename | rowname | Sample ID | Collection date | Cell count | Cohort | Age | Sex | Race | Hispanic? | Days symptom onset to visit 1 | Pair ID | Race_v2 | Batch | /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets | /Time/LD-3+/1419-3+/S/Lymph/4+/107a | /Time/LD-3+/1419-3+/S/Lymph/4+/154 | /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ | /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ | /Time/LD-3+/1419-3+/S/Lymph/CD38+ | /Time/LD-3+/1419-3+/S/Lymph/HLADR+ | Time | FSC-A | FSC-H | SSC-A | SSC-H | CD8b | TNFa | CD107a | CD154 | CD3 ECD | IL2 | CD4 | IL17a | IL4/5/13 | CD14/CD19 | CCR7 | CD38 | L/D | IFNg | CD45RA | HLADR |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1.502 | 115906.84 | 96585 | 27485.91 | 27497 | 1064.57202 | 1440.45300 | 393.4271 | 648.1245 | 2909.432 | 619.5652 | 1705.126 | 861.0718 | 777.4784 | 1025.7701 | 625.7065 | 631.8912 | 1210.5221 | 965.0119 | 1267.6622 | 499.4432 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 2.011 | 50676.04 | 36166 | 13068.27 | 12006 | 337.69443 | 592.89911 | 446.3646 | 816.1756 | 2913.806 | 592.0229 | 1654.902 | 486.0011 | 2992.3889 | 979.4083 | 2385.1418 | 1194.2875 | 716.8176 | 820.3757 | 2494.2458 | 561.7028 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 2.277 | 95741.76 | 77970 | 31374.81 | 30438 | 404.73019 | 2342.62720 | 421.5775 | 1543.7673 | 2929.278 | 1151.6838 | 1943.653 | 1273.3855 | 859.4286 | 847.0671 | 1992.1693 | 1230.5979 | 1355.7234 | 390.6719 | 1200.6874 | 458.7832 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 2.419 | 91918.96 | 80343 | 12862.08 | 12647 | -24.24598 | 260.90768 | 391.5533 | 1706.6123 | 3063.037 | 1603.8656 | 1705.122 | 963.4144 | 852.1685 | 751.9517 | 2187.2285 | 1580.1456 | 1159.5941 | 751.5898 | 2670.7056 | 855.4450 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 2.831 | 108113.04 | 96344 | 13686.84 | 13304 | 1261.14404 | 83.61147 | 786.2542 | 477.3380 | 2977.775 | 295.5194 | 1813.501 | 594.5399 | 631.5034 | 1235.4028 | 1751.8550 | 778.6345 | 1021.9509 | 2112.5845 | 1752.8688 | 682.8989 |
| 112405.fcs | 20200528_COVID_ICS-B1 | 28-MAY-2020 | 15545 | 15545 | DMSO | A02 | P2 | 15545_A2_A02_002.fcs | 112405.fcs_332150 | 15545 | 2020-04-24 | N/A | Hospitalized | 63 | F | Asian | N/A | 47 | 7 | Asian | 1 | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 3.067 | 72211.40 | 57974 | 22689.60 | 21602 | 40.91488 | 2249.43335 | 788.5528 | 1994.1859 | 3052.448 | 2396.2529 | 1849.460 | 1073.7261 | 609.6157 | 651.3246 | 2341.2646 | 1157.6080 | 1250.0403 | 234.1040 | 949.8956 | 706.2128 |
Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.
cytokine_dominance <- cd4_compass_subsets_data %>%
group_by(Batch) %>%
summarise_at(cd4_cytokine_gates, sum) %>%
t() %>%
as.data.frame() %>%
set_names(c("B1", "B2", "B3")) %>%
rownames_to_column("Cytokine_Gate") %>%
dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
| Cytokine_Gate | B1 | B2 | B3 |
|---|---|---|---|
| /Time/LD-3+/1419-3+/S/Lymph/4+/107a | 10571 | 15787 | 12987 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/154 | 10647 | 12296 | 12535 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | 7346 | 4490 | 5410 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | 10328 | 8292 | 9823 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | 606 | 746 | 1275 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | 6356 | 9478 | 10858 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | 11204 | 11027 | 14674 |
cytokine_dominance %>%
pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>%
mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>%
ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
theme_bw(base_size=18) +
geom_bar(stat="identity") +
facet_grid(. ~ Batch) +
labs(title = "CD4 Run Cytokine Dominance by Batch")
cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
"TNFa", "CD107a",
"CD154", "IL2", "IL17a",
"IL4/5/13", "IFNg",
"CCR7", "CD45RA",
"CD38", "HLADR")
cd4.scaled_dimred_input <- cd4_compass_subsets_data %>%
dplyr::select(Batch, all_of(cols_4_dimred)) %>%
group_by(Batch) %>%
nest() %>%
ungroup() %>%
mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>%
unnest(cols = c(data)) %>%
rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>%
dplyr::select(-Batch)
cd4_compass_subsets_data <- cbind(cd4_compass_subsets_data, cd4.scaled_dimred_input)
# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
print("Running UMAP")
set.seed(date)
print(Sys.time())
cd4_compass_subsets_dimred_out <- cd4_compass_subsets_data %>%
# Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>%
uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
print(Sys.time())
cd4_compass_subsets_w_umap <- cbind(as.data.frame(cd4_compass_subsets_dimred_out) %>%
dplyr::rename(x.umap = V1, y.umap = V2),
cd4_compass_subsets_data)
if(save_output) {
saveRDS(cd4_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
} else {
# Assuming UMAP results are already saved
print("Loading saved UMAP run")
cd4_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"
Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground
set.seed(date)
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap[sample(nrow(cd4_compass_subsets_w_umap), nrow(cd4_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))
stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP")
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")
base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
p <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
factor(!!as.name(currentColumn))
} else {
as.logical(!!as.name(currentColumn))
})) +
geom_point(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=14),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
if(!anyNA(colorScheme)) {
p <- p + scale_color_manual(values = colorScheme)
}
if(currentColumn %in% c("Batch", "SAMPLE ID")) {
p <- p + labs(color = currentColumn) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size=15),
legend.text = element_text(size=15),
legend.position = "bottom") +
scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_compass_subsets_w_umap[,currentColumn])))))
} else {
p <- p + theme(legend.position = "none")
}
p
}
base_dimred_plot("Batch")
base_dimred_plot("SAMPLE ID")
Now visualize the cytokine, memory, and activation expression localization
for(cg in cd4_gates_for_dimred) {
print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
labs(title = sprintf("CD4+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}